#!/bin/sh
# include common script
. "`dirname $0`/common"

# word lookup
test_phon en "h@l'oU" "hello"

# correct word stress
test_phon en "s'VmTIN Imp'o@t@nt" "something important"

# suffix rules
test_phon lv \
	"nev'ie:na nuo\`_ 'eiRo:pas v'alsti:m nek,ad nav p'a:Rga:jusi nuo\`_ 'univERsa:la:s_:_: nuo\` v'ispa:Re:jiem n'uoduok_|l^iem f'inanse:tas_:_: s'iste:mas uz 'obliga:ta:s 'apdRuoSina:Sanas s'iste:mu b'eigas" \
	"Neviena no Eiropas valstīm nekad nav pārgājusi no universālās (no vispārējiem nodokļiem finansētas) sistēmas uz obligātās apdrošināšanas sistēmu Beigas." "suffix rules"

# A list entry that uses '$text' to map to another list entry crashes 1.49.2 multi-word support:
test_phon en "d'eIbju:tI2d" "débuted"
test_phon en-US "d'eIbju:t#I#d" "débuted"

# s at the end of abbreviations
# https://github.com/espeak-ng/espeak-ng/issues/706
test_phon en ",aIb,i:;'Em m'It 'Ib@mz m'Its ,aIb,i:;'Em ,Em,aIt'i:; ,eIp,i:;'eItS s,i:;,i:;'Es ,aIt,i:;'Eks ,aIb,i:;'Emz ,Em,aIt'i:z ,eIp,i:;'eItSIz s,i:;,i:;'EsIz ,aIt,i:;'EksIz" "ibm mit ibms mits IBM MIT APH CES ITX IBMs MIT's APHs CES's ITXs"
test_phon lv "'ibm m'it 'ibm-s m'its 'ibm m'it 'aph ts'Es 'it_ks 'ibm-s m'its 'aphs ts'Ess 'it_kss 'a>_:,ess_! ,a:_:,ess_!v'e:_:_:_: s'i:a:_ 'u>_:t,e:_:t,e:_:" "ibm mit ibms mits IBM MIT APH CES ITX IBMs MIT's APHs CES's ITXs AS ASV SIA utt"
test_phon ru "'Ibm (en),aIb,i:;'Em m'It 'Ib@mz m'Its ,aIb,i:;'Em ,Em,aIt'i:; ,eIp,i:;'eItS s,i:;,i:;'Es ,aIt,i:;'Eks 'Ib@mz m'Its 'afz s'EI2zI2z 'ItEksz(ru) sE#SE#'A" "ИБМ ibm mit ibms mits IBM MIT APH CES ITX IBMs MIT's APHs CES's ITXs США"

# bug: https://github.com/nvaccess/nvda/issues/7740
test_phon ta "'il." "ள்"
test_phon my "kon'i" "7"
test_phon ka "s'ami Sv'idi" "3 7"

# bug: https://github.com/nvaccess/nvda/issues/7805
test_phon hi "r'UcI" "रुचि"
test_phon hi "dUk'a:n" "दुकान"
test_phon hi "ka:n'u:n" "कानून"

# pull request: MatchRule: Do not overflow the text https://github.com/espeak-ng/espeak-ng/pull/1126
test_phon en "k'apIt@Lz" "capitals"

# bug: https://github.com/espeak-ng/espeak-ng/issues/824
test_phon ru "(en)s'i:(ru) n'ojl t'otS;ka# v'os;I3md;E2s;ats;'ejm m;,IlI;'onV#f_! p;,Its'ot SE#z;d;is;'jatd;'evI3t; t'ys;itS;_! dv;'es;t;I p;Vd;d;Is;'jattR;'i p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I V#d;'in_ p@-rats'Ent dv;'es;t;I t'otS;ka# t@-r;'ittsat;S'Es;t; t'ys;itS;_! vOs;Ims'ot s;Imn'AttsVt; p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# dv;'e t'ys;itS;i d;E2v;Itn'AttsVt; p@-rats'Ent dv;'es;t;I V#d;'in_ t'otS;ka# n'ojl tR;'iv'os;im;_!s;'ejm d;'evI3t;S'Es;t;_!tR;'i p@-rats'Ent dv'A (en)s'i:(ru) V#d;'in_ t'otS;ka# n'ojl tR;'iv'os;im;_!s;'ejm d;'evI3t;S'Es;t;_!tR;'i p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I V#d;'in_ p@-rats'Ent dv;'es;t;I t'otS;ka# tS;it'yr;ista# d;E2v;Itn'AttsVt; t'ys;itS;_! S,E#s;t;s'ot t@-r;'ittsat;s;'ejm p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# n'ojldv'A_! tR;'is;'ejm_!p;'At; p@-rats'Ent dv'AttsVt; (en)s'i:(ru) p@-rats'Ent dv;'es;t;I t'otS;ka# t@-r;'ittsVt; t'ys;itS;_! v'os;I3md;E2s;ats;'ejm p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# n'ojltS;It'yR;I_! p;'At;Vd;'in__!tS;It'yR;I p@-rats'Ent dv;'es;t;I t'otS;ka# p;,Its'ot v'os;I3md;E2s;atdv;'e t'ys;itS;i_! s;,Ims'ot t@-r;'ittsat;d;'evI3t; p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# dv;'es;t;I SE#stn'AttsVt; t'ys;itS;_! d;Iv;its'ot s'o@-rOkdv'A p@-rats'Ent dv;'es;t;I t'otS;ka# s;,Ims'ot p;Vd;d;Is;'jat t'ys;itS;_! p;,Its'ot d;iv;In'ostOtR;'i p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# tS;it'yr;ista# s;'emd;E2s;Vt t'ys;itS;_! t@-r;'ista# v'os;im; p@-rats'Ent dv;'es;t;I t'otS;ka# n'ojld;'evI3t;_! p;'at;n'ojl_!dv'A p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# st'o s'o@-rOkdv;'e t'ys;itS;i_! p;,Its'ot s;Imn'AttsVt; p@-rats'Ent dv;'es;t;I
dv'A (en)s'i:(ru) v'os;im; t'otS;ka# vOs;Ims'ot dv'Attsat;d;'evI3t; t'ys;itS;_! s;,Ims'ot SE#z;d;is;'jatv'os;im; p@-rats'Ent dv'AttsVt; (en)s'i:(ru) p@-rats'Ent dv;'es;t;I p@-rats'Ent dv'A (en)s'i:(ru) p;'At; t'otS;ka# S,E#s;t;s'ot SE#z;d;is;'jatd;'evI3t; t'ys;itS;_! s'o@-rOkV#d;'in_ p@-rats'Ent dv'AttsVt; tS;It'yR;I t'otS;ka# p;Vd;d;Is;'jatd;'evI3t; t'ys;itS;_! S,E#s;t;s'ot d;iv;In'ostOd;'evI3t; p@-rats'Ent dv'A (en)s'i:(ru) d;'es;It; t'otS;ka# dv'A m;,IlI;'ona#_! S,E#s;t;s'ot p;Vd;d;Is;'jatdv;'e t'ys;itS;i_! t@-r;'ista# d;iv;In'ostOV#d;'in_ p@-rats'Ent dv'AttsVt; d;'es;It; t'otS;ka# dv;'es;t;I SE#z;d;is;'jatp;'At; t'ys;itS;_! dv;'es;t;I t@-r;'ittsat;v'os;im; p@-rats'Ent dv'A (en)s'i:(ru) d;'es;It; t'otS;ka# dv'A m;,IlI;'ona#_! S,E#s;t;s'ot p;Vd;d;Is;'jatdv;'e t'ys;itS;i_! t@-r;'ista# d;iv;In'ostOV#d;'in_ p@-rats'Ent dv'AttsVt; (en)z'Ed(ru) p@-rats'Ent dv'AttsVt; (en)'Em(ru) p@-rats'Ent dv;'e t'ys;itS;i d;'es;It; t'otS;ka# n'ojltR;'i_!s;'ejm v'os;im;S'Es;t;_!p;'At; p@-rats'Ent dv'A (en)s'i:(ru) d;E2v;Itn'AttsVt; t'otS;ka# s;,Ims'ot d;iv;In'ostOv'os;im; t'ys;itS;_! p;,Its'ot t@-r;'ittsat;s;'ejm p@-rats'Ent dv'AttsVt; (en)v'i:(ru) p@-rats'Ent dv;'es;t;I V#d;'in_ t'otS;ka# st'o s;'emd;E2s;VttS;It'yR;I t'ys;itS;i_! st'o v'os;I3md;E2s;atv'os;im; p@-rats'Ent dv'AttsVt; (en)'eI(ru) p@-rats'Ent dv;'es;t;I V#d;'in_ t'otS;ka# tS;it'yr;ista# v'os;I3md;E2s;atv'os;im; t'ys;itS;_! p;,Its'ot d;E2v;Itn'AttsVt; p@-rats'Ent dv'A (en)s'i:(ru) V#d;'in_ t'otS;ka# tS;it'yr;ista# v'os;I3md;E2s;atv'os;im; t'ys;itS;_! p;,Its'ot d;E2v;Itn'AttsVt; p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I V#d;'in_ p@-rats'Ent dv;'es;t;I t'otS;ka# vOs;Ims'ot p;Vd;d;Is;'jats;'ejm t'ys;itS;_! tS;it'yr;ista#" "C0.87569253%200%200%201%200.36817%2C-0.2019%201.0387963%2C1.0387963%200%200%201%200.419637%2C-0.02375%20c%200.30087%2C0.04514%200.582739%2C0.216942%200.750593%2C0.470308%200.09502%2C0.142517%200.153603%2C0.308788%200.18844%2C0.478226%200.03484%2C0.168646%200.0475%2C0.340459%200.05701%2C0.513064%200.03167%2C0.601741%200.03167%2C1.205067%200.01426%2C1.808392%20-0.01426%2C0.526524%20-0.04355%2C1.0673%20-0.253366%2C1.549486%20-0.271575%2C0.619159%20-0.817101%2C1.08155%20-1.405383%2C1.414092%20a%205.5835296%2C5.5835296%200%200%201%20-1.257323%2C0.512272%20c%200.38163%2C1.219319%200.580363%2C2.56532%200.580363%2C3.93349%20a%2013.935071%2C13.935071%200%200%201%20-0.106901%2C1.682498%2010.264446%2C10.264446%200%200%200%205.054631%2C-8.829768%20c%200%2C-5.669041%20-4.59699%2C-10.2652391%20-10.265238%2C-10.2652391%20z%20M%2010.037865%2C19.798537%20v%201.174188%20a%201.488519%2C1.488519%200%200%201%200.857482%2C0.286619%201.3760882%2C1.3760882%200%200%201%200.440222%2C0.538402%20c%200.0966%2C0.213775%200.131432%2C0.456056%200.09184%2C0.687252%20a%201.1821057%2C1.1821057%200%200%201%20-0.262867%2C0.560568%201.3040376%2C1.3040376%200%200%201%20-0.502772%2C0.36263%201.3760882%2C1.3760882%200%200%201%20-0.623119%2C0.0966%20v%202.953287%20l%206.145683%2C-3.33175%20-6.145683%2C-3," "ru sum strings"

# A deleted phoneme at the start of a word should preserve the sourceix property.
test_phon en-GB-x-gbcwmd "aI 'av" "I have"

# A deleted phonSWITCH should preserve the sourceix property of the deleted phoneme.
test_phon hyw "'a g@rn'am" "A Կրնամ"

test_phon en "'eI
b'i: s'i:
d'i:
'i:
'Ef" "A. B C, D. E: F."

# Handling of english acronym + genitive without breaking simple quote
test_phon en "hi: s'Ed
aI w0z b'O@n InD@ j,u:,Es'eI z_:_: and l'Eft" "He said : I was born in the u.s.a.'s and left."

test_phon en "hi: s'Ed
h@l'oU
aIa#m b'ak
and l'Eft" "He said : 'Hello, I am back.' and left."

# Hawaiian okina to apostrophe
test_phon en "h@w'aIi:" "Hawaiʻi"

#----- Emoji [http://www.unicode.org/reports/tr51/tr51-12.html] -----

# ED-3 - emoji_character
test_phon en "'e@ri:z" "♈"
test_phon my "pru3'a2nke3DN_|_| mj@@lu3'a2mj@2tsnh,a2N_|_| nm_|n'e3DN_|_| mj@@nh,ats" "😙"
test_phon ka "k'ots#nis g'amomx,atveli s'axe m'omQim,are t#v'alebit#" "😙"
test_phon ka "'imedg,ats#ru,ebuli m'agram Sv'ebis g'amomx,atveli s'axe" "😥"
# TODO: Fix adjacent emoji without whitespace so the pronunciation is separated by a space.
test_phon en "'e@ri:zr'eInboU" "♈🌈"
test_phon en "'e@ri:z r'eInboU" "♈ 🌈"

# multi-word emoji
test_phon en "Ekskla#m'eIS@n kw'EstS@n m'A@k" "⁉"
test_phon en "Ekskla#m'eIS@n kw'EstS@n m'A@k r'eInboU" "⁉ 🌈"
test_phon en "r'oUlIN 0nD@ fl'o@ l'aafIN" "🤣" # skip words

# bug: https://github.com/espeak-ng/espeak-ng/issues/471
test_phon sk "sm'eju:tsa s'a tv'a:R" "☺"
test_phon sk "bl'ax sm'eju:tsa s'a tv'a:R" "blah ☺"

# bug: https://github.com/espeak-ng/espeak-ng/issues/853
test_phon fr "profEs'Wr" "PR"

# https://github.com/espeak-ng/espeak-ng/pull/1148
test_phon fr "dY mil v'E~" "2020"

# https://github.com/espeak-ng/espeak-ng/issues/1271
test_phon en "f'u:" "foo.."

# note 0xad between "ignoring" and "u+ad" there's a 0xad. It is not a whitespace
test_phon en "Ign'o@rINg,u: pl'Vs 'ad" "Ignoring­u+ad"
# note 0x200c between 1 and 1
test_phon fa "j'ek j'ek" "1‌1"

# Mandarin and Cantonese Chinese with different Latin character fallback
test_phon yue "n'ei5_| h'ou2_| m'aa5_| (en)h@1l'oU1_| D'e@1(yue)_|" "你好馬 hello there"
test_phon yue-latn-jyutping "n'ei5_| h'ou2_| m'aa5_| n'ei5_| h'ou2_| m'aa5_|" "你好馬 nei5 hou2 maa5"
test_phon cmn "ni35X'Au35_| m'A21_| (en)h@44loU11_| De@11(cmn)_|" "你好馬 hello there"
test_phon cmn-latn-pinyin "ni35X'Au35_| m'A35_| n'i35_| X'Au35_| m'A214_|" "你好馬 ni3 hao3 ma3"

test_phon cmn "thiE55nt'i51_|" "天地"

# https://github.com/espeak-ng/espeak-ng/issues/1507
test_phon ml "k'a
t'a
p'a
k'e:r@l.@m
t'a:m@ra
p'o:le" "ക: ത: പ: കേരളം: താമര: പോലെ:"


# https://github.com/espeak-ng/espeak-ng/pull/1743
test_phon ru "k'oS(en)k'A:(ru)" "кошKa"
test_phon ru "'ju(en)m'Vni(ru)" "юMoney"
test_phon ru "'ju(en)m'Vni(ru)" "ЮMoney"
